R packages

Install packages

To install the required packages to run the code below please execute the follwing code.

deps <- c("gelnet","dplyr","gdata")
for(pkg in deps)  if (!pkg %in% installed.packages()) install.packages(pkg, dependencies = TRUE)

Loading required libraries

library(gelnet)
library(dplyr)
library(gdata)

Auxiliary files

# load PCBC metadata (contains group info)
load("pcbc.pd.f.Rda")
pcbc.pd.f
# load PCBC 450K data (subset of 219 probes of interest)
load("pcbc.data.Rda")
pcbc.data
## Mean-center the data
m <- apply(pcbc.data, 1, mean )
pcbc.data.2 <- pcbc.data - m
# Define PCBC groups (SC and non.SC)
M1_smp <- pcbc.pd.f[pcbc.pd.f$Diffname_short %in% "SC",] #SC
M2_smp <- pcbc.pd.f[!(pcbc.pd.f$Diffname_short %in% "SC"),] #non-SC
# Select PCBC data
X.tr <- pcbc.data.2[, as.character(M1_smp$UID)] # 44 samples
X.bk <- pcbc.data.2[, as.character(M2_smp$UID)] # 55 samples
## Train a one-class model
mm <- gelnet( t(X.tr), NULL, 0, 1 ) #NULL for a one-class task 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.3320004 
Iteration 3 : f = 0.3263175 
## Store the signature to a file
save(mm, file = "pcbc-stemsig.p219.Rda")
# Cross-validation with linear model:
## Perform leave-one-out cross-validation
auc <- c()
for( i in 1:ncol(X.tr) ) {
  ## Train a model on non-left-out data
  X1 <- X.tr[,-i]
  X1 <- as.matrix(X1)
  K <- t(X1) %*% X1 / nrow(X1)
  m1 <- gelnet.ker( K, NULL, lambda=1 )
  w1 <- X1 %*% m1$v
  
  ## Score the left-out sample against the background
  X.bk <- X.bk[rownames(X.tr),]
  X.bk <- as.matrix(X.bk)
  s.bk <- t(w1) %*% X.bk
  s.bk <- unmatrix(s.bk)
  
  s1 <- t(w1) %*% X.tr[,i]
  s1 <- unmatrix(s1)
  
  ## AUC = P( left-out sample is scored above the background )
  auc[i] <- sum( s1 > s.bk ) / length(s.bk)
  cat( "Current AUC: ", auc[i], "\n" )
  cat( "Average AUC: ", mean(auc), "\n" )
}
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879741 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6878262 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879694 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879539 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.687956 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879825 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879637 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879255 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879657 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879444 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879578 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879635 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879582 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879487 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.687947 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879496 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879388 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879559 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879396 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879534 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6878202 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879763 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879253 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879475 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6878935 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879382 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879424 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879736 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879895 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879299 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879542 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879551 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879409 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879349 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879832 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.68797 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879832 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879171 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879451 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879434 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879494 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.6879603 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.687948 
Current AUC:  1 
Average AUC:  1 
Training a one-class model
Iteration 1 : f = 0.6931472 
Iteration 2 : f = 0.687957 
Current AUC:  1 
Average AUC:  1 
# 
## Uses the signature to score PanCan33 data
# load TCGA 450K data (subset of 219 probes of interest)
load("data.pan.Rda") 
data.pan
load("type.info.Rda") #(contains tumor type info)
type.info
# Function to replace NA values with median of probe values by tumor type
source("replaceNA.R")
testset <- replace.NA(data.pan, type.info, by="median") 
## Load the signature
load("pcbc-stemsig.p219.Rda")
w <- mm$w
X <- testset[as.character(names(w)),]
X <- as.matrix(X)
## Score via linear model
ss <- t(w) %*% X
## Scale the scores to be between 0 and 1
ss <- ss - min(ss)
ss <- ss / max(ss)
ss <- as.data.frame(t(ss))
colnames(ss) <- "mDNAsi"   
save(ss, file="TCGA_mDNAsi.Rda")
LS0tCnRpdGxlOiAiT25lIGNsYXNzIG1vZGVsIC0gNDUwSyBkYXRhIgphdXRob3I6ICJUYXRoaWFuZSBNYWlzdHJvIE1hbHRhIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIGhpZ2hsaWdodDogdGFuZ28KICAgIHRoZW1lOiBqb3VybmFsCiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OgogICAgICBjb2xsYXBzZWQ6IG5vCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogeWVzCmVkaXRvcl9vcHRpb25zOgogIGNodW5rX291dHB1dF90eXBlOiBpbmxpbmUKLS0tCgojIFIgcGFja2FnZXMKCiMjIEluc3RhbGwgcGFja2FnZXMKClRvIGluc3RhbGwgdGhlIHJlcXVpcmVkIHBhY2thZ2VzIHRvIHJ1biB0aGUgY29kZSBiZWxvdyBwbGVhc2UgZXhlY3V0ZSB0aGUgZm9sbHdpbmcgY29kZS4KYGBge3IgZXZhbCA9IEZBTFNFfQpkZXBzIDwtIGMoImdlbG5ldCIsImRwbHlyIiwiZ2RhdGEiKQpmb3IocGtnIGluIGRlcHMpICBpZiAoIXBrZyAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygpKSBpbnN0YWxsLnBhY2thZ2VzKHBrZywgZGVwZW5kZW5jaWVzID0gVFJVRSkKYGBgCgojIyBMb2FkaW5nIHJlcXVpcmVkIGxpYnJhcmllcwpgYGB7ciAsZWNobyA9IFRSVUUsIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQpsaWJyYXJ5KGdlbG5ldCkKbGlicmFyeShkcGx5cikKbGlicmFyeShnZGF0YSkKYGBgCgojIEF1eGlsaWFyeSBmaWxlcwoKYGBge3J9CiMgbG9hZCBQQ0JDIG1ldGFkYXRhIChjb250YWlucyBncm91cCBpbmZvKQpsb2FkKCJwY2JjLnBkLmYuUmRhIikKcGNiYy5wZC5mCmBgYAoKYGBge3J9CiMgbG9hZCBQQ0JDIDQ1MEsgZGF0YSAoc3Vic2V0IG9mIDIxOSBwcm9iZXMgb2YgaW50ZXJlc3QpCmxvYWQoInBjYmMuZGF0YS5SZGEiKQpwY2JjLmRhdGEKYGBgCgpgYGB7ciBlY2hvID0gVFJVRSwgbWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0V9CiMjIE1lYW4tY2VudGVyIHRoZSBkYXRhCm0gPC0gYXBwbHkocGNiYy5kYXRhLCAxLCBtZWFuICkKcGNiYy5kYXRhLjIgPC0gcGNiYy5kYXRhIC0gbQoKIyBEZWZpbmUgUENCQyBncm91cHMgKFNDIGFuZCBub24uU0MpCk0xX3NtcCA8LSBwY2JjLnBkLmZbcGNiYy5wZC5mJERpZmZuYW1lX3Nob3J0ICVpbiUgIlNDIixdICNTQwpNMl9zbXAgPC0gcGNiYy5wZC5mWyEocGNiYy5wZC5mJERpZmZuYW1lX3Nob3J0ICVpbiUgIlNDIiksXSAjbm9uLVNDCgojIFNlbGVjdCBQQ0JDIGRhdGEKWC50ciA8LSBwY2JjLmRhdGEuMlssIGFzLmNoYXJhY3RlcihNMV9zbXAkVUlEKV0gIyA0NCBzYW1wbGVzClguYmsgPC0gcGNiYy5kYXRhLjJbLCBhcy5jaGFyYWN0ZXIoTTJfc21wJFVJRCldICMgNTUgc2FtcGxlcwoKCiMjIFRyYWluIGEgb25lLWNsYXNzIG1vZGVsCm1tIDwtIGdlbG5ldCggdChYLnRyKSwgTlVMTCwgMCwgMSApICNOVUxMIGZvciBhIG9uZS1jbGFzcyB0YXNrIAoKIyMgU3RvcmUgdGhlIHNpZ25hdHVyZSB0byBhIGZpbGUKc2F2ZShtbSwgZmlsZSA9ICJwY2JjLXN0ZW1zaWcucDIxOS5SZGEiKQoKIyBDcm9zcy12YWxpZGF0aW9uIHdpdGggbGluZWFyIG1vZGVsOgojIyBQZXJmb3JtIGxlYXZlLW9uZS1vdXQgY3Jvc3MtdmFsaWRhdGlvbgphdWMgPC0gYygpCmZvciggaSBpbiAxOm5jb2woWC50cikgKSB7CiAgIyMgVHJhaW4gYSBtb2RlbCBvbiBub24tbGVmdC1vdXQgZGF0YQogIFgxIDwtIFgudHJbLC1pXQogIFgxIDwtIGFzLm1hdHJpeChYMSkKICBLIDwtIHQoWDEpICUqJSBYMSAvIG5yb3coWDEpCiAgbTEgPC0gZ2VsbmV0LmtlciggSywgTlVMTCwgbGFtYmRhPTEgKQogIHcxIDwtIFgxICUqJSBtMSR2CiAgCiAgIyMgU2NvcmUgdGhlIGxlZnQtb3V0IHNhbXBsZSBhZ2FpbnN0IHRoZSBiYWNrZ3JvdW5kCiAgWC5iayA8LSBYLmJrW3Jvd25hbWVzKFgudHIpLF0KICBYLmJrIDwtIGFzLm1hdHJpeChYLmJrKQogIHMuYmsgPC0gdCh3MSkgJSolIFguYmsKICBzLmJrIDwtIHVubWF0cml4KHMuYmspCiAgCiAgczEgPC0gdCh3MSkgJSolIFgudHJbLGldCiAgczEgPC0gdW5tYXRyaXgoczEpCiAgCiAgIyMgQVVDID0gUCggbGVmdC1vdXQgc2FtcGxlIGlzIHNjb3JlZCBhYm92ZSB0aGUgYmFja2dyb3VuZCApCiAgYXVjW2ldIDwtIHN1bSggczEgPiBzLmJrICkgLyBsZW5ndGgocy5iaykKICBjYXQoICJDdXJyZW50IEFVQzogIiwgYXVjW2ldLCAiXG4iICkKICBjYXQoICJBdmVyYWdlIEFVQzogIiwgbWVhbihhdWMpLCAiXG4iICkKfQojIApgYGAKCmBgYHtyfQojIyBVc2VzIHRoZSBzaWduYXR1cmUgdG8gc2NvcmUgUGFuQ2FuMzMgZGF0YQojIGxvYWQgVENHQSA0NTBLIGRhdGEgKHN1YnNldCBvZiAyMTkgcHJvYmVzIG9mIGludGVyZXN0KQpsb2FkKCJkYXRhLnBhbi5SZGEiKSAKZGF0YS5wYW4KYGBgCmBgYHtyfQpsb2FkKCJ0eXBlLmluZm8uUmRhIikgIyhjb250YWlucyB0dW1vciB0eXBlIGluZm8pCnR5cGUuaW5mbwpgYGAKICAKYGBge3IgcmVzdWx0cyA9ICJoaWRlIiwgbWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0V9CiMgRnVuY3Rpb24gdG8gcmVwbGFjZSBOQSB2YWx1ZXMgd2l0aCBtZWRpYW4gb2YgcHJvYmUgdmFsdWVzIGJ5IHR1bW9yIHR5cGUKc291cmNlKCJyZXBsYWNlTkEuUiIpCnRlc3RzZXQgPC0gcmVwbGFjZS5OQShkYXRhLnBhbiwgdHlwZS5pbmZvLCBieT0ibWVkaWFuIikgCmBgYAoKYGBge3IgZWNobyA9IFRSVUUsIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQojIyBMb2FkIHRoZSBzaWduYXR1cmUKbG9hZCgicGNiYy1zdGVtc2lnLnAyMTkuUmRhIikKdyA8LSBtbSR3CgpYIDwtIHRlc3RzZXRbYXMuY2hhcmFjdGVyKG5hbWVzKHcpKSxdClggPC0gYXMubWF0cml4KFgpCgojIyBTY29yZSB2aWEgbGluZWFyIG1vZGVsCnNzIDwtIHQodykgJSolIFgKIyMgU2NhbGUgdGhlIHNjb3JlcyB0byBiZSBiZXR3ZWVuIDAgYW5kIDEKc3MgPC0gc3MgLSBtaW4oc3MpCnNzIDwtIHNzIC8gbWF4KHNzKQpzcyA8LSBhcy5kYXRhLmZyYW1lKHQoc3MpKQoKY29sbmFtZXMoc3MpIDwtICJtRE5Bc2kiICAgCnNhdmUoc3MsIGZpbGU9IlRDR0FfbUROQXNpLlJkYSIpCmBgYAo=